# Kalmia analyze fruit size at end of season
# Using fruit sizes calculated from image segmentation in Python with opencv
#  Callin Switzer
# 20 October 2016
# 10 Feb 2017 Update: Conducted Statistical Modeling with LMER and GLMER
# 11 March 2017 Update: added random effect to MER models to account for plant lineage

Read in data

ipak <- function(pkg){
     new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
     if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
     sapply(pkg, require, character.only = TRUE)
}

packages <- c("ggplot2", 'lme4', 'plyr', 'influence.ME', 'sjPlot', 'multcomp', 'lsmeans', 'Matrix')
ipak(packages)
##      ggplot2         lme4         plyr influence.ME       sjPlot 
##         TRUE         TRUE         TRUE         TRUE         TRUE 
##     multcomp      lsmeans       Matrix 
##         TRUE         TRUE         TRUE
setwd("/Users/callinswitzer/Dropbox/ExperSummer2016/Kalmia/Manuscript/DatasetsSupplemental/")

theme_set(theme_classic())

kfrt <- read.csv("kalmiaFruitFinal.csv", stringsAsFactors = FALSE)
nrow(kfrt[kfrt$dia_mm == 20.0,]) # number of images taken
## [1] 92
# clean and process data
kfrt <- kfrt[kfrt$dia_mm != 20.0, ] # 20 is the reference object in segmented images
nrow(kfrt) # number of total fruits measured
## [1] 1305
# label treatmens and access numbers
kfrt$trt <- sapply(X = 1:nrow(kfrt), FUN = function(x) strsplit(kfrt$plantNum[x], split = "__")[[1]][2])
kfrt$accessNum <- sapply(X = 1:nrow(kfrt), FUN = function(x) strsplit(kfrt$plantNum[x], split = "__")[[1]][1])

kfrt$trt <- mapvalues(kfrt$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Unbagged", 
                                                "Unbagged & Outcrossed", "Unbagged_2"))
# add plant lineage (all plants that start with 1129 are from the lineage, 673_69 )
plantInds <- 1:nrow(kfrt) %in% grep(pattern = "1129", x = kfrt$plantNum)
kfrt$plantLineage <- sapply(1:length(plantInds), FUN = function(x){
     ifelse(plantInds[x], "673_69", paste(strsplit(kfrt$plantNum[x], split = "_")[[1]][1:2], collapse = "_") )})

Add plants with 0 count to the dataset

# count number of fruits
counts = as.data.frame(table(kfrt$plantNum))
counts$trt = sapply(X = 1:nrow(counts), FUN = function(x) strsplit(as.character(counts$Var1[x]), split = "__")[[1]][2])

counts$trt <- mapvalues(counts$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Unbagged", 
                                                "Unbagged & Outcrossed", "Unbagged_2"))


counts$accessNum = sapply(X = 1:nrow(counts), FUN = function(x) strsplit(as.character(counts$Var1[x]), split = "__")[[1]][1])


# add in the trts and accession numbers that had counts of 0
# read in error-checked datasheet
kalNotes <- read.csv("KalmiaDailyDatasheets_ErrorChecked.csv")

# get the accession numbers I should have
accNumsHave <- unique(kalNotes$Plant.Number)
accNumsHave <- accNumsHave[!(accNumsHave %in% c("", "Plant Number"))]

#change formatting to match above
accNumsHave <- gsub("#", "", accNumsHave)
accNumsHave <- gsub("\ ", "_", accNumsHave)
accNumsHave <- gsub("\\-", "_", accNumsHave)
accNumsHave <- gsub("\\*", "_", accNumsHave)
accNumsHave <-toupper(accNumsHave)

shouldHave <- as.data.frame(t(sapply(accNumsHave, function(x) as.character(paste(x, c(1:4), sep = "__")))))
row.names(shouldHave) <- NULL
shouldHave1 <- c(as.character(shouldHave[, 1]), 
                 as.character(shouldHave[, 2]), 
                 as.character(shouldHave[, 3]), 
                 as.character(shouldHave[, 4]))

# find missing ones -- this is the ones that
# are in the daily datasheet that aren't in the fruit measurements
missingTrts <- setdiff(shouldHave1,unique(kfrt$plantNum[kfrt$trt != "5"]))

# this should be 0 -- the ones from the fruit measurements that aren't in the daily datasheet
setdiff(unique(kfrt$plantNum[kfrt$trt != "Unbagged_2"]), shouldHave1)
## character(0)
# here's the list of plants that had their bags/tags go missing during the experiment (meaning that
# I was unable to collect fruits)
# note: "677_66_MASS__1" was the only plant that was missing a tag during the final collection
# that wasn't missing in one of my previous checks. 
NAList <- c("1129_74_E__4", "1129_74_C__4", "39_60_A__3", "685_70_A__2", "677_66_MASS__1")

# note: this ignores trt#5 which is the same as #3
ZeroFruitPlants <- missingTrts[!(missingTrts %in% NAList)]

zfdf <- data.frame(Var1 = ZeroFruitPlants, 
                   Freq = 0, 
                   trt = sapply(X = 1:length(ZeroFruitPlants), 
                                FUN = function(x) strsplit(as.character(ZeroFruitPlants[x]), 
                                                           split = "__")[[1]][2]),
                   accessNum = sapply(X = 1:length(ZeroFruitPlants), 
                                            FUN = function(x) strsplit(as.character(ZeroFruitPlants[x]), 
                                                                       split = "__")[[1]][1])
                   )

zfdf$trt <- mapvalues(zfdf$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Unbagged", 
                                                "Unbagged & Outcrossed", "Unbagged_2"))
## The following `from` values were not present in `x`: 3, 4, 5
countFin <- rbind(counts, zfdf)


# final fruit counts for the fruits collected at the end of the experiment
countFin <- countFin[order(countFin$accessNum, countFin$trt, decreasing = FALSE), ]

# change labels from unbagged to conntrol
countFin$trt <- mapvalues(countFin$trt, c("Bagged", "Bagged & Selfed", "Unbagged", 
                                                "Unbagged & Outcrossed", "Unbagged_2")
                          , c("Bagged", "Bagged & Selfed", "Control", 
                                                "Unbagged & Outcrossed", "Control_2"))
# change levels, so that control is first
countFin$trt <- factor(countFin$trt, levels = c("Control","Control_2", "Bagged", "Bagged & Selfed", 
                                                "Unbagged & Outcrossed"))

plantInds <- 1:nrow(countFin) %in% grep(pattern = "1129", x = countFin$accessNum)
countFin$plantLineage <- sapply(1:length(plantInds), FUN = function(x){
     ifelse(plantInds[x], "673_69", paste(strsplit(countFin$accessNum[x], split = "_")[[1]][1:2], collapse = "_") )})

Visualize counts of fruits

ggplot(countFin, aes(x = trt , y = Freq, fill = trt)) + 
     geom_boxplot() +
    # geom_violin()+
     
     labs(y = "Number of fruits", x = "Treatment") + 
     scale_fill_brewer(name = "Treatment", palette = "Set1")

saveDir <- "/Users/callinswitzer/Dropbox/ExperSummer2016/Kalmia/Manuscript/Media/"
ggsave(paste0(saveDir, "KalmiaFruitNumber.pdf"), width = 10, height = 8)

Visualize counts of fruits (ignore treatment #5)

ggplot(countFin[!(countFin$trt %in% 'Control_2'), ], aes(x = trt , y = Freq, fill = trt)) + 
     geom_boxplot() +
     # geom_point(aes(color = plantLineage)) + 
     labs(y = "Number of fruits", x = "Treatment") +
     theme(axis.text.x = element_text(angle = 35, hjust = 1), 
           legend.position = 'none') + 
     scale_fill_brewer(name = "Treatment", palette = "Set1")

ggsave(paste0(saveDir, "KalmiaFruitNumber_trt1_4Only.pdf"), width = 5, height = 4)

Use GLMM’s to find differences in number of fruits

First, summarize the dataset that I will be using

# I'm ignoring control#2, because it was originally intended for another experiment (analysis wasn't planned)
cf1 <- droplevels(countFin[countFin$trt != "Control_2",])
nrow(countFin) # number of total counts, including Control2
## [1] 104
sum(cf1$Freq) # total number of fruits in the analysis of count
## [1] 1035
# Number of counts, excluding control_2
# this is different from the number of photos
# because I didn't take photos on 0-count plants
nrow(cf1) 
## [1] 84
data.frame(table(cf1$trt)) # number of plants in each treatment that we are analyzing
##                    Var1 Freq
## 1               Control   21
## 2                Bagged   22
## 3       Bagged & Selfed   21
## 4 Unbagged & Outcrossed   20
data.frame(table(cf1$accessNum)) # shows number of counts per plant -- 4 means that we counted all four treatments
##           Var1 Freq
## 1    1129_74_A    4
## 2    1129_74_B    4
## 3    1129_74_C    3
## 4    1129_74_E    3
## 5    1129_74_F    4
## 6    12_2006_A    4
## 7  128_96_MASS    4
## 8  132_98_MASS    4
## 9     150_58_A    4
## 10    232_46_A    4
## 11     35_40_C    4
## 12     39_60_A    3
## 13    425_74_D    4
## 14    440_66_A    4
## 15     46_18_A    4
## 16 572_64_MASS    4
## 17    624_64_B    4
## 18 667_66_MASS    4
## 19    673_69_B    4
## 20    685_70_A    3
## 21    721_79_B    4
## 22 UNLABELED_1    4
data.frame(table(cf1$plantLineage))
##           Var1 Freq
## 1      12_2006    4
## 2       128_96    4
## 3       132_98    4
## 4       150_58    4
## 5       232_46    4
## 6        35_40    4
## 7        39_60    3
## 8       425_74    4
## 9       440_66    4
## 10       46_18    4
## 11      572_64    4
## 12      624_64    4
## 13      667_66    4
## 14      673_69   22
## 15      685_70    3
## 16      721_79    4
## 17 UNLABELED_1    4
# shows which treatments / plants are missing from analysis
# this is different from the plants that just had 0 counts
data.frame(table(interaction(cf1$accessNum , cf1$trt))) 
##                                 Var1 Freq
## 1                  1129_74_A.Control    1
## 2                  1129_74_B.Control    1
## 3                  1129_74_C.Control    1
## 4                  1129_74_E.Control    1
## 5                  1129_74_F.Control    1
## 6                  12_2006_A.Control    1
## 7                128_96_MASS.Control    1
## 8                132_98_MASS.Control    1
## 9                   150_58_A.Control    1
## 10                  232_46_A.Control    1
## 11                   35_40_C.Control    1
## 12                   39_60_A.Control    0
## 13                  425_74_D.Control    1
## 14                  440_66_A.Control    1
## 15                   46_18_A.Control    1
## 16               572_64_MASS.Control    1
## 17                  624_64_B.Control    1
## 18               667_66_MASS.Control    1
## 19                  673_69_B.Control    1
## 20                  685_70_A.Control    1
## 21                  721_79_B.Control    1
## 22               UNLABELED_1.Control    1
## 23                  1129_74_A.Bagged    1
## 24                  1129_74_B.Bagged    1
## 25                  1129_74_C.Bagged    1
## 26                  1129_74_E.Bagged    1
## 27                  1129_74_F.Bagged    1
## 28                  12_2006_A.Bagged    1
## 29                128_96_MASS.Bagged    1
## 30                132_98_MASS.Bagged    1
## 31                   150_58_A.Bagged    1
## 32                   232_46_A.Bagged    1
## 33                    35_40_C.Bagged    1
## 34                    39_60_A.Bagged    1
## 35                   425_74_D.Bagged    1
## 36                   440_66_A.Bagged    1
## 37                    46_18_A.Bagged    1
## 38                572_64_MASS.Bagged    1
## 39                   624_64_B.Bagged    1
## 40                667_66_MASS.Bagged    1
## 41                   673_69_B.Bagged    1
## 42                   685_70_A.Bagged    1
## 43                   721_79_B.Bagged    1
## 44                UNLABELED_1.Bagged    1
## 45         1129_74_A.Bagged & Selfed    1
## 46         1129_74_B.Bagged & Selfed    1
## 47         1129_74_C.Bagged & Selfed    1
## 48         1129_74_E.Bagged & Selfed    1
## 49         1129_74_F.Bagged & Selfed    1
## 50         12_2006_A.Bagged & Selfed    1
## 51       128_96_MASS.Bagged & Selfed    1
## 52       132_98_MASS.Bagged & Selfed    1
## 53          150_58_A.Bagged & Selfed    1
## 54          232_46_A.Bagged & Selfed    1
## 55           35_40_C.Bagged & Selfed    1
## 56           39_60_A.Bagged & Selfed    1
## 57          425_74_D.Bagged & Selfed    1
## 58          440_66_A.Bagged & Selfed    1
## 59           46_18_A.Bagged & Selfed    1
## 60       572_64_MASS.Bagged & Selfed    1
## 61          624_64_B.Bagged & Selfed    1
## 62       667_66_MASS.Bagged & Selfed    1
## 63          673_69_B.Bagged & Selfed    1
## 64          685_70_A.Bagged & Selfed    0
## 65          721_79_B.Bagged & Selfed    1
## 66       UNLABELED_1.Bagged & Selfed    1
## 67   1129_74_A.Unbagged & Outcrossed    1
## 68   1129_74_B.Unbagged & Outcrossed    1
## 69   1129_74_C.Unbagged & Outcrossed    0
## 70   1129_74_E.Unbagged & Outcrossed    0
## 71   1129_74_F.Unbagged & Outcrossed    1
## 72   12_2006_A.Unbagged & Outcrossed    1
## 73 128_96_MASS.Unbagged & Outcrossed    1
## 74 132_98_MASS.Unbagged & Outcrossed    1
## 75    150_58_A.Unbagged & Outcrossed    1
## 76    232_46_A.Unbagged & Outcrossed    1
## 77     35_40_C.Unbagged & Outcrossed    1
## 78     39_60_A.Unbagged & Outcrossed    1
## 79    425_74_D.Unbagged & Outcrossed    1
## 80    440_66_A.Unbagged & Outcrossed    1
## 81     46_18_A.Unbagged & Outcrossed    1
## 82 572_64_MASS.Unbagged & Outcrossed    1
## 83    624_64_B.Unbagged & Outcrossed    1
## 84 667_66_MASS.Unbagged & Outcrossed    1
## 85    673_69_B.Unbagged & Outcrossed    1
## 86    685_70_A.Unbagged & Outcrossed    1
## 87    721_79_B.Unbagged & Outcrossed    1
## 88 UNLABELED_1.Unbagged & Outcrossed    1

Create a model

Account for plant lineage and accession number

m1 <- glmer(Freq ~ trt + (1|plantLineage) +  (1|accessNum), family = poisson, data = cf1)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Data: cf1
## 
##      AIC      BIC   logLik deviance df.resid 
##    709.4    724.0   -348.7    697.4       78 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8555 -1.3244 -0.3544  1.1650  5.5802 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  accessNum    (Intercept) 0.2085   0.4566  
##  plantLineage (Intercept) 0.2278   0.4773  
## Number of obs: 84, groups:  accessNum, 22; plantLineage, 17
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               2.04258    0.17623  11.590  < 2e-16 ***
## trtBagged                -0.97065    0.12274  -7.908 2.62e-15 ***
## trtBagged & Selfed        0.18796    0.08836   2.127   0.0334 *  
## trtUnbagged & Outcrossed  0.74998    0.08352   8.980  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.200              
## trtBggd&Slf -0.275  0.396       
## trtUnbggd&O -0.312  0.421  0.583
# calculate LRT for trt
m2 <- update(m1, .~. - trt)
anova(m1, m2) # highly significant
## Data: cf1
## Models:
## m2: Freq ~ (1 | plantLineage) + (1 | accessNum)
## m1: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Df    AIC     BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## m2  3 999.49 1006.79 -496.75   993.49                             
## m1  6 709.42  724.01 -348.71   697.42 296.07      3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model diagnostics

m1 <- glmer(Freq ~ trt + (1|plantLineage) +  (1|accessNum), family = poisson, data = cf1)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Data: cf1
## 
##      AIC      BIC   logLik deviance df.resid 
##    709.4    724.0   -348.7    697.4       78 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8555 -1.3244 -0.3544  1.1650  5.5802 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  accessNum    (Intercept) 0.2085   0.4566  
##  plantLineage (Intercept) 0.2278   0.4773  
## Number of obs: 84, groups:  accessNum, 22; plantLineage, 17
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               2.04258    0.17623  11.590  < 2e-16 ***
## trtBagged                -0.97065    0.12274  -7.908 2.62e-15 ***
## trtBagged & Selfed        0.18796    0.08836   2.127   0.0334 *  
## trtUnbagged & Outcrossed  0.74998    0.08352   8.980  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.200              
## trtBggd&Slf -0.275  0.396       
## trtUnbggd&O -0.312  0.421  0.583
# diagnostics
# qq plot
qqnorm(resid(m1), main = "")
qqline(resid(m1)) # outliers

# residual plot
plot(fitted(m1), resid(m1), xlab = "fitted", ylab = "residuals")
abline(0,0)

# possibly outliers


# QQPlot for group-level effects
qqnorm(ranef(m1)$accessNum[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1)$accessNum[[1]]) 

# # QQPlot for group-level effects
qqnorm(ranef(m1)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1)$plantLineage[[1]])

infl <- influence(m1, obs = TRUE) # takes a while to calculate
# x11()
plot(infl, which = 'cook') # some influential points

# visualize model: 
sjp.lmer(m1, type = 'fe')

sjp.lmer(m1, type = 're', sort = TRUE) # plot random effects to look for outliers
## Plotting random effects...
## Plotting random effects...

#check assumptions of model 
overdisp_fun <- function(model) {
  ## number of variance parameters in 
  ##   an n-by-n variance-covariance matrix
  vpars <- function(m) {
    nrow(m)*(nrow(m)+1)/2
  }
  model.df <- sum(sapply(VarCorr(model),vpars))+length(fixef(model))
  rdf <- nrow(model.frame(model))-model.df
  rp <- residuals(model,type="pearson")
  Pearson.chisq <- sum(rp^2)
  prat <- Pearson.chisq/rdf
  pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
  c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}

overdisp_fun(m1) # shows overdispersion
##        chisq        ratio          rdf            p 
## 2.727300e+02 3.496539e+00 7.800000e+01 2.078860e-23
# here's another way to check for overdispersion
residDev <- sum(residuals(m1, type = 'deviance')^2) # calculate residual deviance
# this ratio should be about 1 -- larger than 1 suggests overdispersion
residDev / df.residual(m1) 
## [1] 4.072153

Use negative binomial model, since the poisson model is overdispersed

m1.1 <- glmer.nb(Freq ~ trt +(1|plantLineage) +  (1|accessNum), data = cf1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00161649 (tol =
## 0.001, component 1)
summary(m1.1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Negative Binomial(2.1736)  ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Data: cf1
## 
##      AIC      BIC   logLik deviance df.resid 
##    571.9    588.9   -279.0    557.9       77 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3912 -0.5687 -0.1032  0.4856  2.6633 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  accessNum    (Intercept) 0.07951  0.2820  
##  plantLineage (Intercept) 0.33722  0.5807  
## Number of obs: 84, groups:  accessNum, 22; plantLineage, 17
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               2.06410    0.23661   8.724  < 2e-16 ***
## trtBagged                -1.06039    0.25396  -4.175 2.97e-05 ***
## trtBagged & Selfed        0.04771    0.24127   0.198 0.843254    
## trtUnbagged & Outcrossed  0.78092    0.23708   3.294 0.000988 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.465              
## trtBggd&Slf -0.487  0.479       
## trtUnbggd&O -0.533  0.457  0.469
# get overall p-value for treatment
m2.1 <- update(m1.1, .~. - trt)
anova(m1.1, m2.1, test = 'LRT') # LRT for trt for paper
## Data: cf1
## Models:
## m2.1: Freq ~ (1 | plantLineage) + (1 | accessNum)
## m1.1: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##      Df    AIC    BIC  logLik deviance Chisq Chi Df Pr(>Chisq)    
## m2.1  4 618.65 628.37 -305.32   610.65                            
## m1.1  7 571.92 588.93 -278.96   557.92 52.73      3  2.093e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Negative binomial model diagnostics

overdisp_fun(m1.1) 
##      chisq      ratio        rdf          p 
## 60.3298837  0.7734600 78.0000000  0.9310811
# here's another way to check for overdispersion
residDev <- sum(residuals(m1.1, type = 'deviance')^2) # calculate residual deviance
# this ratio should be about 1 -- larger than 1 suggests overdispersion
residDev / df.residual(m1.1) 
## [1] 1.151896
# diagnostics
# qq plot
qqnorm(resid(m1.1), main = "")
qqline(resid(m1.1)) # a little better

# residual plot
plot(fitted(m1.1), resid(m1.1), xlab = "fitted", ylab = "residuals")
abline(0,0) # residuals look better for this model

# QQPlot for group-level effects
qqnorm(ranef(m1.1)$accessNum[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1.1)$accessNum[[1]])

qqnorm(ranef(m1.1)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1.1)$plantLineage[[1]]) 

infl <- influence(m1.1, obs = TRUE) # takes a while to calculate
plot(infl, which = 'cook') # nothing above 1

# visualize model: 
sjp.lmer(m1.1, type = 'fe')

sjp.lmer(m1.1, type = 're', sort = TRUE) # plot random effects to look for outliers
## Plotting random effects...
## Plotting random effects...

# get estimated dispersion parameter for NB Model
getME(m1.1, "glmer.nb.theta") # 2.17
## [1] 2.173551
# post-hoc pairwise comparisons with adjusted p-values
# from documentation: 
# test = adjusted() multiple test procedures as specified by the type argument
# to adjusted: "single-step" denotes adjusted p values as computed
# from the joint normal or t distribution of the z statistics (default),
summary(glht(m1.1, lsm(pairwise ~ trt)), test=adjusted("single-step")) # pairwise tests for fruit counts
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: glmer(formula = Freq ~ trt + (1 | plantLineage) + (1 | accessNum), 
##     data = cf1, family = negative.binomial(theta = 2.17355065418305))
## 
## Linear Hypotheses:
##                                              Estimate Std. Error z value
## Control - Bagged == 0                         1.06039    0.25396   4.175
## Control - Bagged & Selfed == 0               -0.04771    0.24127  -0.198
## Control - Unbagged & Outcrossed == 0         -0.78092    0.23708  -3.294
## Bagged - Bagged & Selfed == 0                -1.10810    0.25307  -4.379
## Bagged - Unbagged & Outcrossed == 0          -1.84131    0.25634  -7.183
## Bagged & Selfed - Unbagged & Outcrossed == 0 -0.73321    0.24645  -2.975
##                                              Pr(>|z|)    
## Control - Bagged == 0                         < 0.001 ***
## Control - Bagged & Selfed == 0                0.99726    
## Control - Unbagged & Outcrossed == 0          0.00558 ** 
## Bagged - Bagged & Selfed == 0                 < 0.001 ***
## Bagged - Unbagged & Outcrossed == 0           < 0.001 ***
## Bagged & Selfed - Unbagged & Outcrossed == 0  0.01530 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Visualize annotated plot for fruit counts

count_pub <- within(countFin, {
     trt <- mapvalues(trt, from = c("Control","Control_2", "Bagged", "Bagged & Selfed", 
                                                "Unbagged & Outcrossed"), 
                      to = c("Control","Control_2", "Bagged", "Bagged\n & Selfed", 
                                                "Outcrossed"))
})

count_pub <- droplevels(count_pub)


ggplot(count_pub[!(count_pub$trt %in% 'Control_2'), ], aes(x = trt , y = Freq+ 0.1)) + 
     geom_boxplot(width = 0.2, fill = 'grey80') +
     labs(y = "Number of fruits", x = "Treatment") +
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') + 
     # annotate
     annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) + 10, label=c("a", "b", "a", "c"),
               color="black") + 
     scale_y_log10(breaks = c(1, 10, 100), limits = c(0.1, 100))

ggsave(paste0(saveDir, "KalmiaFruitNumber_logScale.pdf"), width = 3, height = 4)

Visualize CI’s for each of the groups

# refit m1.1 with treatments in alphabetical order (b/c order isn't preserved in model matrix)
cf1.1 <- within(cf1, {
     trt = as.factor(as.character(cf1$trt))
})

# refit model with different reference levels
m1.2 <- glmer.nb(Freq ~ trt + (1|plantLineage) + (1|accessNum), data = cf1.1)
summary(m1.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Negative Binomial(2.1735)  ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Data: cf1.1
## 
##      AIC      BIC   logLik deviance df.resid 
##    571.9    588.9   -279.0    557.9       77 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3912 -0.5687 -0.1032  0.4856  2.6633 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  accessNum    (Intercept) 0.07951  0.2820  
##  plantLineage (Intercept) 0.33723  0.5807  
## Number of obs: 84, groups:  accessNum, 22; plantLineage, 17
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.0037     0.2542   3.949 7.84e-05 ***
## trtBagged & Selfed         1.1081     0.2531   4.379 1.19e-05 ***
## trtControl                 1.0604     0.2540   4.175 2.98e-05 ***
## trtUnbagged & Outcrossed   1.8413     0.2563   7.183 6.81e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtB&S trtCnt
## trtBggd&Slf -0.544              
## trtControl  -0.566  0.547       
## trtUnbggd&O -0.598  0.532  0.568
# may help solve convergence issues-- NOPE!
pframe <- data.frame(trt = levels(droplevels(cf1.1$trt)), plantLineage = 99999)
pframe$Freq <- 0
pp <- predict(m1.2, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0


### Calculate CI's (using bootstrap, not accounting for random effects)
## I think that sometimes the model doesn't converge, because it estimates variance
## for random effects to be 0.
##  according to this: http://rstudio-pubs-static.s3.amazonaws.com/24365_2803ab8299934e888a60e7b16113f619.html
## you can safely ignore the warnings about convergence

bb2 <- bootMer(m1.2, FUN=function(x) predict(x, pframe, re.form=NA, type = 'response'), nsim = 999)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00321899 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00133798 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00287675 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00627154 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00222816 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00155286 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0039832 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.02287 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00613069 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00306586 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.285398 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00187441 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00395396 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00139004 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00109059 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00116152 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0033603 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00382423 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00373438 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0116561 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00589979 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0011311 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00317879 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00573381 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00443951 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00117369 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00697235 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00898725 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00119292 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00317499 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00683289 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00272453 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00127228 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0017724 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00556343 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.003275 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00634127 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0135418 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00156614 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00155926 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00163296 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00387724 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00209908 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0012554 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00840774 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00793418 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00300328 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00466504 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00385997 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00140893 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00173394 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.114761 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00113881 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0800753 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00899045 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00220976 (tol =
## 0.001, component 1)
bb2_se <-apply(bb2$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb2_se[1,]
pframe$bhi<-bb2_se[2,]
pframe$predMean <- pp


# This method is the same, but less straightforward.
# mm <- model.matrix(terms(m1.2), pframe)
# predFun<-function(.) exp(mm%*%fixef(.) )
# bb<-bootMer(m1.2,FUN=predFun,nsim=200) #do this 200 times
# 
# # as we did this 200 times the 95% CI will be bordered by the 5th and 195th value
# bb_se<-apply(bb$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
# pframe$blo<-bb_se[1,]
# pframe$bhi<-bb_se[2,]
# pframe$predMean <- pp

pframe$median = tapply(cf1.1$Freq, INDEX = cf1.1$trt, median)
pframe$trt <- mapvalues(pframe$trt, from = c("Control", "Bagged", "Bagged & Selfed", 
                                                "Unbagged & Outcrossed"),
                        to = c("Control", "Bagged", "Bagged\n & Selfed", 
                                                "Outcrossed"))
pframe$trt <- relevel(pframe$trt, ref = "Control")

#plot 95% confidence intervals
# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
number_ticks <- function(n) {function(limits) pretty(limits, n)}
g0 <- ggplot(pframe, aes(x=trt, y=predMean))+
     geom_point()+ 
     labs(y = "Number of fruits", x = "Treatment") + 
     geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
     scale_y_log10(limits =c(1,60), breaks = number_ticks(6)) + 
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') +
     # annotate
     annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) - 10, label=c("a", "b", "a", "c"),
               color="black") 
g0

ggsave(paste0(saveDir, "KalmiaFruitNumber_BSCI_logScale.pdf"), width = 3, height = 4)


## Bootstrap CI on linear scale -- not that great!
g1 <- ggplot(pframe, aes(x=trt, y=predMean))+
     geom_point()+ 
     labs(y = "Number of Fruits", x = "Treatment") + 
     geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+ 
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') +
     # annotate
     annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) - 40, label=c("a", "b", "a", "c"),
               color="black") 
g1

ggsave(paste0(saveDir, "KalmiaFruitNumber_BSCI_LinearScale.pdf"), width = 3, height = 4)


cp <- droplevels(count_pub[count_pub$trt != 'Control_2',])

# this might actually be the best way
ggplot(cp, aes(x = trt , y = Freq)) + 
     geom_boxplot(width = 0.2, fill = 'grey80') +
     labs(y = "Number of fruits", x = "Treatment") +
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') + 
     # annotate
     annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) + 1, label=c("a", "b", "a", "c"),
               color="black") 

ggsave(paste0(saveDir, "KalmiaFruitNumber_LinearScale.pdf"), width = 3, height = 4)

Visualize fruit size

# calculate fruit size by plant
sizeDF_mean <- as.data.frame(tapply(kfrt$dia_mm, INDEX = kfrt$plantNum, mean))
colnames(sizeDF_mean) = "meanFrtSz"
sizeDF_mean$trt = sapply(X = 1:length(sizeDF_mean$meanFrtSz), 
                          FUN = function(x) strsplit(as.character(row.names(sizeDF_mean)[x]), 
                                                     split = "__")[[1]][2])

sizeDF_mean$trt <- mapvalues(sizeDF_mean$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Control", 
                                                "Unbagged & Outcrossed", "Control_2"))

# reorder
sizeDF_mean$trt <- factor(sizeDF_mean$trt, levels = c("Control", "Control_2", "Bagged", "Bagged & Selfed",  
                                                "Unbagged & Outcrossed"))

sizeDF_mean$accessNum = sapply(X = 1:length(sizeDF_mean$meanFrtSz), 
                   FUN = function(x) strsplit(as.character(row.names(sizeDF_mean)[x]), 
                                              split = "__")[[1]][1])


ggplot(sizeDF_mean, aes(x = trt, y = meanFrtSz, fill = trt)) + 
     geom_boxplot(alpha = 0.5) + 
#      stat_summary(fun.y=mean, geom="line", aes(group = accessNum, color = accessNum))  + 
#      stat_summary(fun.y=mean, geom="point", aes(group = accessNum, color = accessNum)) + 
     geom_point(aes(color = trt))

ggplot(sizeDF_mean, aes(x = trt, y = meanFrtSz, fill = trt)) + 
     geom_boxplot() + 
     labs(y = "Mean Fruit Diameter (mm)", x = "Treatment") + 
     scale_fill_brewer(name = "Treatment", palette = "Set1")

ggsave(paste0(saveDir, "KalmiaFruitDiameter.pdf"), width = 10, height = 8)


# visualize , but exclude treatment 5
ggplot(sizeDF_mean[!(sizeDF_mean$trt %in% 'Control_2'), ], 
       aes(x = trt, y = meanFrtSz, fill = trt)) + 
     geom_boxplot() + 
     labs(y = "Mean Fruit Diameter (mm)", x = "Treatment") + 
     scale_fill_brewer(name = "Treatment", palette = "Set1") + 
     theme(axis.text.x = element_text(angle = 35, hjust = 1), 
          legend.position = 'none') 

ggsave(paste0(saveDir, "KalmiaFruitDiameter_trt14Only.pdf"), width = 5, height = 4)

Model Fruit Size with LMER

size1 <- within(kfrt, {
     trt <- mapvalues(trt, c("Bagged", "Bagged & Selfed", "Unbagged", 
                                                "Unbagged & Outcrossed", "Unbagged_2"), 
                      c("Bagged", "Bagged\n & Selfed", "Control", 
                                                "Outcrossed", "Control_2"))
})

size1 <- droplevels(size1[size1$trt != "Control_2",])

# get sample sizes for size dataset
nrow(size1) # total number of fruits in analysis for size
## [1] 1035
data.frame(table(size1$accessNum)) # total number of fruits per plant
##           Var1 Freq
## 1    1129_74_A   93
## 2    1129_74_B   36
## 3    1129_74_C   69
## 4    1129_74_E   43
## 5    1129_74_F   48
## 6    12_2006_A   22
## 7  128_96_MASS   64
## 8  132_98_MASS   17
## 9     150_58_A   39
## 10    232_46_A   13
## 11     35_40_C   70
## 12     39_60_A   37
## 13    425_74_D   94
## 14    440_66_A   53
## 15     46_18_A   19
## 16 572_64_MASS   31
## 17    624_64_B   43
## 18 667_66_MASS   69
## 19    673_69_B  124
## 20    685_70_A   23
## 21    721_79_B   10
## 22 UNLABELED_1   18
data.frame(table(size1$plantLineage)) # total number of fruits per plant lineage
##           Var1 Freq
## 1      12_2006   22
## 2       128_96   64
## 3       132_98   17
## 4       150_58   39
## 5       232_46   13
## 6        35_40   70
## 7        39_60   37
## 8       425_74   94
## 9       440_66   53
## 10       46_18   19
## 11      572_64   31
## 12      624_64   43
## 13      667_66   69
## 14      673_69  413
## 15      685_70   23
## 16      721_79   10
## 17 UNLABELED_1   18
size1$trt <- relevel(as.factor(size1$trt), ref = "Control")
f1 <- lmer(dia_mm ~ trt + (1|plantLineage/accessNum), data = size1)
summary(f1) # final model for paper
## Linear mixed model fit by REML ['lmerMod']
## Formula: dia_mm ~ trt + (1 | plantLineage/accessNum)
##    Data: size1
## 
## REML criterion at convergence: 1852.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8939 -0.6307  0.0377  0.6695  3.4682 
## 
## Random effects:
##  Groups                 Name        Variance Std.Dev.
##  accessNum:plantLineage (Intercept) 0.04439  0.2107  
##  plantLineage           (Intercept) 0.13357  0.3655  
##  Residual                           0.32638  0.5713  
## Number of obs: 1035, groups:  accessNum:plantLineage, 22; plantLineage, 17
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)           4.23422    0.11027   38.40
## trtBagged            -0.51953    0.07274   -7.14
## trtBagged\n & Selfed  0.01439    0.05304    0.27
## trtOutcrossed         0.73741    0.04881   15.11
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.179              
## trtBggd&Slf -0.235  0.387       
## trtOutcrssd -0.296  0.404  0.537
f1 <- lmer(dia_mm ~ trt + (1|plantLineage) + (1|accessNum), data = size1)
summary(f1) # final model for paper (same as above)
## Linear mixed model fit by REML ['lmerMod']
## Formula: dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Data: size1
## 
## REML criterion at convergence: 1852.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8939 -0.6307  0.0377  0.6695  3.4682 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  accessNum    (Intercept) 0.04439  0.2107  
##  plantLineage (Intercept) 0.13357  0.3655  
##  Residual                 0.32638  0.5713  
## Number of obs: 1035, groups:  accessNum, 22; plantLineage, 17
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)           4.23422    0.11027   38.40
## trtBagged            -0.51953    0.07274   -7.14
## trtBagged\n & Selfed  0.01439    0.05304    0.27
## trtOutcrossed         0.73741    0.04881   15.11
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.179              
## trtBggd&Slf -0.235  0.387       
## trtOutcrssd -0.296  0.404  0.537
# get p-value for trt
f2 <- update(f1, .~. - trt)
anova(f1, f2, "LRT")
## refitting model(s) with ML (instead of REML)
## Data: size1
## Models:
## f2: dia_mm ~ (1 | plantLineage) + (1 | accessNum)
## f1: dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Df    AIC    BIC   logLik deviance Chisq Chi Df Pr(>Chisq)    
## f2  4 2234.7 2254.5 -1113.36   2226.7                            
## f1  7 1851.4 1886.0  -918.71   1837.4 389.3      3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fruit Size Diagnostics

# diagnostics
# qq plot
qqnorm(resid(f1), main = "")
qqline(resid(f1)) # good

# residual plot
plot(fitted(f1), residuals(f1, type = "deviance"), xlab = "fitted", ylab = "residuals")
abline(0,0) 

# QQPlot for group-level effects
qqnorm(ranef(f1)$accessNum[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(f1)$accessNum[[1]]) # one possible outlier

# QQPlot for group-level effects
qqnorm(ranef(f1)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(f1)$plantLineage[[1]]) # looks good

infl <- influence(f1, obs = TRUE) # takes a while to calculate
plot(infl, which = 'cook') # nothing above 1

# visualize model: 
sjp.lmer(f1, type = 'fe')
## Computing p-values via Kenward-Roger approximation. Use `p.kr = FALSE` if computation takes too long.

# Best Linear Unbiased Predictors (BLUPs)
sjp.lmer(f1, type = 're', sort = TRUE) # plot random effects to look for outliers
## Plotting random effects...
## Plotting random effects...

# post-hoc pairwise comparisons with adjusted p-values
# from documentation: 
# test = adjusted() multiple test procedures as specified by the type argument
# to adjusted: "single-step" denotes adjusted p values as computed
# from the joint normal or t distribution of the z statistics (default),
summary(glht(f1, lsm(pairwise ~ trt)), test=adjusted("single-step")) # pairwise tests for fruit counts
## Loading required namespace: lmerTest
## Note: df set to 1020
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: lmer(formula = dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum), 
##     data = size1)
## 
## Linear Hypotheses:
##                                     Estimate Std. Error t value Pr(>|t|)
## Control - Bagged == 0                0.51953    0.07274   7.142   <1e-06
## Control - Bagged\n & Selfed == 0    -0.01439    0.05304  -0.271    0.993
## Control - Outcrossed == 0           -0.73741    0.04881 -15.107   <1e-06
## Bagged - Bagged\n & Selfed == 0     -0.53392    0.07156  -7.461   <1e-06
## Bagged - Outcrossed == 0            -1.25694    0.06930 -18.138   <1e-06
## Bagged\n & Selfed - Outcrossed == 0 -0.72301    0.04916 -14.709   <1e-06
##                                        
## Control - Bagged == 0               ***
## Control - Bagged\n & Selfed == 0       
## Control - Outcrossed == 0           ***
## Bagged - Bagged\n & Selfed == 0     ***
## Bagged - Outcrossed == 0            ***
## Bagged\n & Selfed - Outcrossed == 0 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Visualize Fruit Size with annotations

# refit m1.1 with treatments in alphabetical order (b/c order isn't preserved in model matrix)
sf1<- within(size1, {
     trt = as.factor(as.character(size1$trt))
})

# refit model with different reference levels
f1 <- lmer(dia_mm ~ trt + (1|plantLineage) + (1|accessNum), data = sf1)
pframe <- data.frame(trt = levels(droplevels(sf1$trt)))
pframe$dia_mm = 0
pp <- predict(f1, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0


### Calculate CI's (using bootstrap, not accounting for random effects)
mm <- model.matrix(terms(f1), pframe)
predFun<-function(.) mm%*%fixef(.)
bb<-bootMer(f1,FUN=predFun,nsim=999) #do this 200 times
# get quantiles from bootstrap sample
bb_se<-apply(bb$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb_se[1,]
pframe$bhi<-bb_se[2,]
pframe$predMean <- pp
pframe # print frame, in case reporting in a table
##                 trt dia_mm      blo      bhi predMean
## 1            Bagged      0 3.474886 3.946818 3.714693
## 2 Bagged\n & Selfed      0 4.029984 4.465712 4.248615
## 3           Control      0 4.015512 4.442943 4.234223
## 4        Outcrossed      0 4.763427 5.175298 4.971629
pframe$trt <- relevel(pframe$trt, ref = "Control")

#plot 95% confidence intervals
# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
g2 <- ggplot(pframe, aes(x=trt, y=predMean))+
     geom_point()+ 
     labs(y = "Fruit dia. (mm)", x = "Treatment") + 
     geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') +
     # annotate
     annotate(geom="text", x=c(1,2,3,4), y=c(5,5,5,5) + 0.7, label=c("a", "b", "a", "c"),
               color="black") 
g2

ggsave(paste0(saveDir, "KalmiaFruitDia_BSCI.pdf"), width = 3, height = 4)

 ## visualize raw data (mean fruit size per plant)

sdf1 <- droplevels(within(sizeDF_mean[!(sizeDF_mean$trt %in% 'Control_2'), ], {
     trt <- mapvalues(trt, c("Bagged", "Bagged & Selfed", "Control", 
                                                "Unbagged & Outcrossed", "Control_2"), 
                      c("Bagged", "Bagged\n & Selfed", "Control", 
                                                "Outcrossed", "Control_2"))
}))

# visualize fruit size
ggplot(sdf1, aes(x = trt, y = meanFrtSz)) + 
     geom_boxplot(width = 0.2, fill = 'grey80') +
     labs(y = "Fruit dia. (mm)", x = "Treatment") +
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') + 
     # annotate
     annotate(geom="text", x=c(1,2,3,4), y=c(5,5,5,5) + 2, label=c("a", "b", "a", "c"),
               color="black")

ggsave(paste0(saveDir, "KalmiaFruitDia.pdf"), width = 3, height = 4)


# compare two plots
g2 + geom_boxplot(data = sdf1, aes(x = trt, y = meanFrtSz), width = 0.2, alpha = 0)

Visualize data to compare fruit size with number of seeds

# read in data
sizeSeed <- read.csv("KalmiaFruitSizeAndSeeds.csv")
plantInds <- 1:nrow(sizeSeed) %in% grep(pattern = "1129", x = sizeSeed$Plant)
sizeSeed$plantLineage <- sapply(1:nrow(sizeSeed), FUN = function(x){
     ifelse(plantInds[x], "673_69", paste(strsplit(as.character(sizeSeed$Plant[x]), split = "_")[[1]][1:2], collapse = "_") )})


ggplot(sizeSeed, aes(x = Dia..mm., y = NumSeeds)) + 
     geom_point() + 
     stat_smooth(method = 'lm', formula = y ~ exp(x), se = F) + 
     labs(x = 'Fruit Diameter (mm)', y = 'Num Seeds in 1 Carpel')

ggsave(paste0(saveDir, "KalmiaFruitSeeds.pdf"), width = 5, height = 4)


# on log scale
ggplot(sizeSeed, aes(x = Dia..mm., y = NumSeeds)) + 
     geom_point() + 
     stat_smooth(method = 'lm', formula = y ~ x, se = F) + 
     scale_y_continuous(trans="log") + 
     labs(y = "log(e) number of seeds")

# GLMER
ss1 <- glmer(NumSeeds ~ Dia..mm. + (1|plantLineage), family = poisson, data = sizeSeed)
summary(ss1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: NumSeeds ~ Dia..mm. + (1 | plantLineage)
##    Data: sizeSeed
## 
##      AIC      BIC   logLik deviance df.resid 
##    139.2    142.0    -66.6    133.2       16 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.32556 -0.41555  0.01926  0.42617  1.28347 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  plantLineage (Intercept) 0.1422   0.3771  
## Number of obs: 19, groups:  plantLineage, 15
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.02551    0.57541   0.044    0.965    
## Dia..mm.     0.66422    0.12957   5.126 2.96e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##          (Intr)
## Dia..mm. -0.979
ss2 <- update(ss1, .~. - Dia..mm.)
anova(ss1, ss2, test = "LRT") # LRT for paper
## Data: sizeSeed
## Models:
## ss2: NumSeeds ~ (1 | plantLineage)
## ss1: NumSeeds ~ Dia..mm. + (1 | plantLineage)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## ss2  2 158.21 160.09 -77.103   154.21                             
## ss1  3 139.16 141.99 -66.580   133.16 21.047      1  4.482e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
residDev <- sum(residuals(ss1, type = 'deviance')^2) # calculate residual deviance
# this ratio should be about 1 -- larger than 1 suggests overdispersion
residDev / df.residual(ss1) 
## [1] 0.8913075
overdisp_fun(ss1) #not overdispersed
##      chisq      ratio        rdf          p 
## 12.7841497  0.7990094 16.0000000  0.6884697
## model diagnostics 
plot(ss1) #cook's distance

qqnorm(resid(ss1), main = "")
qqline(resid(ss1)) # one outlier

# # QQPlot for group-level effects
qqnorm(ranef(ss1)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(ss1)$plantLineage[[1]])

infl <- influence(ss1, obs = TRUE) # takes a while to calculate
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00117623 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00117749 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00105581 (tol =
## 0.001, component 1)
plot(infl, which = 'cook') # some influential points

# get sample size
nrow(sizeSeed) # total number of individual fruits
## [1] 19
sum(sizeSeed$NumSeeds) # total number of seeds counted
## [1] 382
# total plant lineages
length(unique(sizeSeed$plantLineage))
## [1] 15
# plot data with line
preds <- predict(ss1, newdata = data.frame(Dia..mm. = seq(2.5, 5.5, length.out = 100)), re.form = NA, type = 'response')

predDF <- data.frame(Dia..mm. = seq(2.5, 5.5, length.out = 100), preds = preds)


# visualize data
ggplot(sizeSeed, aes(x = Dia..mm., y = NumSeeds)) + 
     geom_point() + 
     geom_line(data = predDF, aes(x = Dia..mm., y = preds), color = 'blue') +  
     labs(x = 'Fruit Diameter (mm)', y = 'Num Seeds in 1 Carpel')

ggsave(paste0(saveDir, "KalmiaFruitDia_NumSeeds.pdf"), width = 3, height = 4)